home *** CD-ROM | disk | FTP | other *** search
- #
- # Patterns.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- from time import time as now
-
- from consts import *
- from GlobalObjects import *
- from TestRecord import *
-
- VIEW_SUBJECT = 'subject'
- VIEW_SENDER = 'sender'
- VIEW_HEADERS = 'headers'
- VIEW_BODY = 'body'
- VIEW_FILENAMES = 'filenames'
- VIEW_CHARSETS = 'charsets'
- VIEW_RENDERING = 'rendering'
-
-
- class Pattern (object):
- """A pattern test to some aspect (view) of an email message
- ----------------------------------------------------------
- name: the name of this pattern test (can contain spaces, in Unicode)
- testRecords: a dict of (view, TestRecord), where view is one of the VIEW_* defined above
- origPattern: the original pattern, possibly with meta pattern names used in it (Unicode)
- pattern: a regexp object compiled from an instantiated origPattern
- isManaged: True if this pattern is managed
- recipientPattern: the regex pattern for the recipient; only when this matches is this
- pattern in effect
- encodingPattern: the regex pattern for the encoding; only when this matches is this
- pattern in effect
-
- * the following attributes are only set and used in the GUI
-
- viewsInUse: a set of views that are currently in use
- metaPatterns: a set of names of meta patterns used
- """
- # improving performance by not having __dict__
- __slots__ = ('_patTuple', 'name', 'testRecords', 'origPattern', 'pattern', 'isManaged',
- 'recipientPattern', 'encodingPattern', 'viewsInUse', 'metaPatterns')
-
- def __init__ (self, name, testRecords, pattern, isManaged = False,
- recipientPattern = None, encodingPattern = None):
- self.name = name
- self.testRecords = testRecords # thread-safe
- self.origPattern = pattern
- self.isManaged = isManaged
- self._patTuple = (recipientPattern, encodingPattern)
-
- def __getattr__ (self, name):
- # lazy initialization
- if name == 'pattern':
- self.pattern = re.compile(globalObjects.metaPatterns.instantiate(self.origPattern))
- return self.pattern
-
- elif name == 'recipientPattern':
- p = self._patTuple[0]
- if p:
- self.recipientPattern = re.compile(p)
- else:
- self.recipientPattern = None
- return self.recipientPattern
-
- elif name == 'encodingPattern':
- p = self._patTuple[1]
- if p:
- self.encodingPattern = re.compile(p)
- else:
- self.encodingPattern = None
- return self.encodingPattern
-
- else:
- raise AttributeError('No attribute %s in this %s instance.' % (name, self.__class__.__name__))
-
- def changePattern (self, newPattern):
- del self.pattern
- self.origPattern = newPattern
- for testRecord in self.testRecords.values():
- testRecord.reset()
-
- def changeRecipientPattern (self, newPattern):
- # CAUTION: self._patTuple is left unchanged
- self.recipientPattern = re.compile(newPattern)
-
- def changeEncodingPattern (self, newPattern):
- # CAUTION: self._patTuple is left unchanged
- self.encodingPattern = re.compile(newPattern)
-
- def run (self, msg, view):
- """Run the pattern test over 'msg' for 'view'; returns tuple (bool, float);
- where the bool is a matching object if a match is found, None otherwise;
- the float is the CPU time spent (usec)."""
-
- #assert msg.m is not None # must be a well-formed email!
-
- start = now()
- isPositive = self.pattern.search(getattr(msg, view))
- finish = now()
-
- return isPositive, (finish - start) * 1000000.0
-
- def runWithText (self, txt):
- """Run the instantiated pattern over a given text; returns a list of text spans (i.e., tuples
- (start, end) ); pattern statistics is not updated."""
- return map(lambda mo: mo.span(0), self.pattern.finditer(txt))
-
- def getAttribute_ (self, name):
- """This is basically for Obj-C side of PyObjC bridge so we can get at the instance variables"""
- return getattr(self, name)
-
- def setAttribute_withValue_ (self, name, value):
- """This is basically for Obj-C side of PyObjC bridge so we can set an instance variable"""
- setattr(self, name, value)
-
-
- if __name__ == '__main__':
- import sys
-
- if len(sys.argv) < 4:
- print 'Usage: ./Pattern.py <filename> <pattern> <view>'
- print ' * filename is the name of the file containing email raw source;'
- print ' * pattern is a regex pattern surrounded by double quotes.'
- print ' * view = one of subject, sender, headers, body, filenames, charsets and rednering.'
- sys.exit(1)
-
- from Message import *
-
- view = sys.argv[3]
- aPattern = Pattern(u'Test pattern', {view:TestRecord()}, sys.argv[2])
- msg = Message(open(sys.argv[1]).read())
-
- mo, cpuTime = aPattern.run(msg, view)
- if mo:
- print encodeText('* Pattern "%s" matches "%s": %f usec(s)' % (aPattern.name,
- mo.group(0),
- cpuTime))
-